
#!/bin/bash
set -x  # 显示每条命令
echo "Starting script execution at $(date)"
# export NCCL_IB_DISABLE=0
# export NCCL_IB_HCA=mlx5
# export NCCL_DEBUG=INFO
# export NCCL_SOCKET_IFNAME=eth0,ibp1s0
# export GLOO_SOCKET_IFNAME=eth0,ibp1s0
# export NCCL_DEBUG_SUBSYS=ALL

# export NCCL_DEBUG=INFO
export NCCL_IB_DISABLE=0
export NCCL_IB_HCA=mlx5_0,mlx5_1,mlx5_2,mlx5_3
export NCCL_SOCKET_IFNAME=eth0
export GLOO_SOCKET_IFNAME=eth0
# Disable tokenizers parallelism
export TOKENIZERS_PARALLELISM=false
echo "TOKENIZERS_PARALLELISM: $TOKENIZERS_PARALLELISM"

export PYTORCH_HIP_ALLOC_CONF=expandable_segments:True

# export NCCL_IB_GID_INDEX=3

# # Conda activation (must be executed on all nodes)
# source /m2v_intern/liujie/miniconda3/etc/profile.d/conda.sh
# conda deactivate
# conda activate /m2v_intern/liujie/miniconda3/envs/flow_grpo

# Project root directory (modify according to actual path)



PROJECT_ROOT="/data01/lyl/step4_1024_multi_nodes/step4_gen_0707/DiffusionNFT"
cd $PROJECT_ROOT



# MASTER_ADDR=172.16.107.207 
MASTER_ADDR=172.16.168.187
MASTER_PORT=19000
torchrun \
  --nnodes=3 \
  --nproc_per_node=4 \
  --node_rank=2 \
  --master_addr=$MASTER_ADDR \
  --master_port=$MASTER_PORT \
  scripts/train_nft_sd3.py \
  --config $PROJECT_ROOT/config/nft.py:sd3_geneval

# MASTER_PORT=19001
# MASTER_ADDR=172.16.247.71
# sd3_multi_reward   sd3_geneval
# # MASTER_ADDR=172.16.247.104
# RANK=0
# # MASTER_ADDR=10.8.160.45
# # Launch command (parameters automatically read from accelerate_multi_node.yaml)
# accelerate launch --config_file $PROJECT_ROOT/scripts/accelerate_configs/multi_node.yaml \
#     --num_machines 2 --num_processes 8 \
#     --machine_rank ${RANK} --main_process_ip ${MASTER_ADDR} --main_process_port ${MASTER_PORT} \
#     scripts/train.py \
#     --config config/grpo.py:geneval_sd3

